package cn.tyoui.core;

import cn.tyoui.pojo.ProxyIP;
import cn.tyoui.proxy.CrawlerProxyIP;
import cn.tyoui.httpclient.HttpCrawler;

import java.io.File;
import java.io.InputStream;
import java.util.*;

/**
 * 爬虫控制类
 *
 * @author Tyoui
 * @version 1.8.1
 */
public class ControllerCrawler {

    private Map<String, String> map = new HashMap<>();
    private String dir = "c:\\crawler";
    private String url = "http://www.baidu.com";
    private int threadStart, threadEnd;

    /**
     * 启动爬虫
     *
     * @throws Exception 启动错误
     */
    public void start() throws Exception {
        Properties properties = new Properties();
        InputStream in = this.getClass().getResourceAsStream("/crawler.properties");
        properties.load(in);
        Iterator<String> it = properties.stringPropertyNames().iterator();
        while (it.hasNext()) {
            String key = it.next();
            String value = properties.getProperty(key);
            map.put(key.toLowerCase(), value);
        }
        in.close();
        if (threadEnd == 0 && threadStart == 0)
            initCrawler();
    }

    /**
     * 爬虫初始化
     * 增加代理ip接口
     *
     * @throws Exception 爬虫初始化错误
     */
    public void initCrawler() throws Exception {
        HttpCrawler httpCrawler = new HttpCrawler();
        CrawlerProxyIP crawlerProxyIP = new CrawlerProxyIP();
        String proxy = changer("isProxyIP", null);
        if (proxy != null) {
            boolean flag = Boolean.parseBoolean(changer("isFreeIP", "" + true));
            if (flag) {
                int allNum = Integer.parseInt(changer("ProxyAllNum", "500")) / 100;
                int maxIp = Integer.parseInt(changer("MaxProxyIP", "100"));
                for (int i = 1; i <= allNum; i++) {
                    String html;
                    try {
                        html = crawlerProxyIP.crawler("http://www.xicidaili.com/nn/" + i);
                        crawlerProxyIP.proxyRead(html);
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
                crawlerProxyIP.writeIP(maxIp);
                String path = new File("").getCanonicalPath() + File.separator + "text";
                httpCrawler.proxyInit(path + File.separator + "ip.txt");
            } else {
                if (changer("ProxyIpAPI", null) != null) {
                    String url = changer("ProxyIpAPI", null);
                    String html = crawlerProxyIP.crawler(url);
                    List<ProxyIP> list = crawlerProxyIP.htmlListIP(html, changer("apiSplit", "\n"));
                    httpCrawler.setList(list);
                } else {
                    String text = changer("oneselfProxyIPText", new File(".").getAbsolutePath());
                    oneselfProxyIP(httpCrawler, text);
                }
            }
        }
        int min = Integer.parseInt(changer("minTime", "0"));
        int max = Integer.parseInt(changer("maxTime", "0")) - min;
        url = changer("URL", url);
        dir = changer("CrawlerHtmlDir", dir);
        httpCrawler.setDir(dir);
        String joinEnd = changer("endIndex", null);
        if (joinEnd != null) {
            int end = Integer.parseInt(joinEnd);
            int start = Integer.parseInt(changer("startIndex", "0"));
            String URLPrefix = changer("URLPrefix", "");
            String URLSuffix = changer("URLSuffix", "");
            if (threadStart == 0 && threadEnd == 0) {
                for (int i = start; i < end; i++)
                    httpCrawler.startCrawler(url + URLPrefix + i + URLSuffix, min, max);
            } else {
                for (int i = threadStart; i < threadEnd; i++)
                    httpCrawler.startCrawler(url + URLPrefix + i + URLSuffix, min, max);
            }
        } else {
            httpCrawler.startCrawler(url, min, max);
        }
        httpCrawler.close();
    }

    /**
     * 判断是否有值
     *
     * @param key    转入properties的主键
     * @param values 默认值
     * @return properties的值或者是默认值
     */
    public String changer(String key, String values) {
        String value = map.get(key.toLowerCase());
        if (value != null)
            return value;
        return values;
    }


    /**
     * 是否启动自己代理IP
     *
     * @param httpCrawler 爬虫对象
     * @param textIP      代理IP文本
     */
    private void oneselfProxyIP(HttpCrawler httpCrawler, String textIP) {
        try {
            httpCrawler.proxyInit(textIP);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }


    /**
     * 设置线程开始位置
     *
     * @param start 开始位置
     * @param end   结束位置
     */
    public void setThread(int start, int end) {
        this.threadStart = start;
        this.threadEnd = end;
    }

}
